// The BSD 1-Clause License (BSD-1-Clause)
//
// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     1. Redistributions of source code must retain the above copyright
//        notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

package field_curve448

// The file provides arithmetic on the field Z/(2^448 - 2^224 - 1) using
// unsaturated 64-bit integer arithmetic.  It is derived primarily
// from the machine generated Golang output from the fiat-crypto project.
//
// While the base implementation is provably correct, this implementation
// makes no such claims as the port and optimizations were done by hand.
//
// TODO:
//  * When fiat-crypto supports it, using a saturated 64-bit limbs
//    instead of 56-bit limbs will be faster, though the gains are
//    minimal unless adcx/adox/mulx are used.

import fiat "core:crypto/_fiat"
import "core:math/bits"

Loose_Field_Element :: distinct [8]u64
Tight_Field_Element :: distinct [8]u64

@(rodata)
FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0, 0, 0, 0}
@(rodata)
FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0, 0, 0, 0}

_addcarryx_u56 :: #force_inline proc "contextless" (
	arg1: fiat.u1,
	arg2, arg3: u64,
) -> (
	out1: u64,
	out2: fiat.u1,
) {
	x1 := ((u64(arg1) + arg2) + arg3)
	x2 := (x1 & 0xffffffffffffff)
	x3 := fiat.u1((x1 >> 56))
	out1 = x2
	out2 = x3
	return
}

_subborrowx_u56 :: #force_inline proc "contextless" (
	arg1: fiat.u1,
	arg2, arg3: u64,
) -> (
	out1: u64,
	out2: fiat.u1,
) {
	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
	x2 := fiat.u1((x1 >> 56))
	x3 := (u64(x1) & 0xffffffffffffff)
	out1 = x3
	out2 = (0x0 - fiat.u1(x2))
	return
}

fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
	x2, x1 := bits.mul_u64(arg1[7], arg2[7])
	x4, x3 := bits.mul_u64(arg1[7], arg2[6])
	x6, x5 := bits.mul_u64(arg1[7], arg2[5])
	x8, x7 := bits.mul_u64(arg1[6], arg2[7])
	x10, x9 := bits.mul_u64(arg1[6], arg2[6])
	x12, x11 := bits.mul_u64(arg1[5], arg2[7])
	x14, x13 := bits.mul_u64(arg1[7], arg2[7])
	x16, x15 := bits.mul_u64(arg1[7], arg2[6])
	x18, x17 := bits.mul_u64(arg1[7], arg2[5])
	x20, x19 := bits.mul_u64(arg1[6], arg2[7])
	x22, x21 := bits.mul_u64(arg1[6], arg2[6])
	x24, x23 := bits.mul_u64(arg1[5], arg2[7])
	x26, x25 := bits.mul_u64(arg1[7], arg2[7])
	x28, x27 := bits.mul_u64(arg1[7], arg2[6])
	x30, x29 := bits.mul_u64(arg1[7], arg2[5])
	x32, x31 := bits.mul_u64(arg1[7], arg2[4])
	x34, x33 := bits.mul_u64(arg1[7], arg2[3])
	x36, x35 := bits.mul_u64(arg1[7], arg2[2])
	x38, x37 := bits.mul_u64(arg1[7], arg2[1])
	x40, x39 := bits.mul_u64(arg1[6], arg2[7])
	x42, x41 := bits.mul_u64(arg1[6], arg2[6])
	x44, x43 := bits.mul_u64(arg1[6], arg2[5])
	x46, x45 := bits.mul_u64(arg1[6], arg2[4])
	x48, x47 := bits.mul_u64(arg1[6], arg2[3])
	x50, x49 := bits.mul_u64(arg1[6], arg2[2])
	x52, x51 := bits.mul_u64(arg1[5], arg2[7])
	x54, x53 := bits.mul_u64(arg1[5], arg2[6])
	x56, x55 := bits.mul_u64(arg1[5], arg2[5])
	x58, x57 := bits.mul_u64(arg1[5], arg2[4])
	x60, x59 := bits.mul_u64(arg1[5], arg2[3])
	x62, x61 := bits.mul_u64(arg1[4], arg2[7])
	x64, x63 := bits.mul_u64(arg1[4], arg2[6])
	x66, x65 := bits.mul_u64(arg1[4], arg2[5])
	x68, x67 := bits.mul_u64(arg1[4], arg2[4])
	x70, x69 := bits.mul_u64(arg1[3], arg2[7])
	x72, x71 := bits.mul_u64(arg1[3], arg2[6])
	x74, x73 := bits.mul_u64(arg1[3], arg2[5])
	x76, x75 := bits.mul_u64(arg1[2], arg2[7])
	x78, x77 := bits.mul_u64(arg1[2], arg2[6])
	x80, x79 := bits.mul_u64(arg1[1], arg2[7])
	x82, x81 := bits.mul_u64(arg1[7], arg2[4])
	x84, x83 := bits.mul_u64(arg1[7], arg2[3])
	x86, x85 := bits.mul_u64(arg1[7], arg2[2])
	x88, x87 := bits.mul_u64(arg1[7], arg2[1])
	x90, x89 := bits.mul_u64(arg1[6], arg2[5])
	x92, x91 := bits.mul_u64(arg1[6], arg2[4])
	x94, x93 := bits.mul_u64(arg1[6], arg2[3])
	x96, x95 := bits.mul_u64(arg1[6], arg2[2])
	x98, x97 := bits.mul_u64(arg1[5], arg2[6])
	x100, x99 := bits.mul_u64(arg1[5], arg2[5])
	x102, x101 := bits.mul_u64(arg1[5], arg2[4])
	x104, x103 := bits.mul_u64(arg1[5], arg2[3])
	x106, x105 := bits.mul_u64(arg1[4], arg2[7])
	x108, x107 := bits.mul_u64(arg1[4], arg2[6])
	x110, x109 := bits.mul_u64(arg1[4], arg2[5])
	x112, x111 := bits.mul_u64(arg1[4], arg2[4])
	x114, x113 := bits.mul_u64(arg1[3], arg2[7])
	x116, x115 := bits.mul_u64(arg1[3], arg2[6])
	x118, x117 := bits.mul_u64(arg1[3], arg2[5])
	x120, x119 := bits.mul_u64(arg1[2], arg2[7])
	x122, x121 := bits.mul_u64(arg1[2], arg2[6])
	x124, x123 := bits.mul_u64(arg1[1], arg2[7])
	x126, x125 := bits.mul_u64(arg1[7], arg2[0])
	x128, x127 := bits.mul_u64(arg1[6], arg2[1])
	x130, x129 := bits.mul_u64(arg1[6], arg2[0])
	x132, x131 := bits.mul_u64(arg1[5], arg2[2])
	x134, x133 := bits.mul_u64(arg1[5], arg2[1])
	x136, x135 := bits.mul_u64(arg1[5], arg2[0])
	x138, x137 := bits.mul_u64(arg1[4], arg2[3])
	x140, x139 := bits.mul_u64(arg1[4], arg2[2])
	x142, x141 := bits.mul_u64(arg1[4], arg2[1])
	x144, x143 := bits.mul_u64(arg1[4], arg2[0])
	x146, x145 := bits.mul_u64(arg1[3], arg2[4])
	x148, x147 := bits.mul_u64(arg1[3], arg2[3])
	x150, x149 := bits.mul_u64(arg1[3], arg2[2])
	x152, x151 := bits.mul_u64(arg1[3], arg2[1])
	x154, x153 := bits.mul_u64(arg1[3], arg2[0])
	x156, x155 := bits.mul_u64(arg1[2], arg2[5])
	x158, x157 := bits.mul_u64(arg1[2], arg2[4])
	x160, x159 := bits.mul_u64(arg1[2], arg2[3])
	x162, x161 := bits.mul_u64(arg1[2], arg2[2])
	x164, x163 := bits.mul_u64(arg1[2], arg2[1])
	x166, x165 := bits.mul_u64(arg1[2], arg2[0])
	x168, x167 := bits.mul_u64(arg1[1], arg2[6])
	x170, x169 := bits.mul_u64(arg1[1], arg2[5])
	x172, x171 := bits.mul_u64(arg1[1], arg2[4])
	x174, x173 := bits.mul_u64(arg1[1], arg2[3])
	x176, x175 := bits.mul_u64(arg1[1], arg2[2])
	x178, x177 := bits.mul_u64(arg1[1], arg2[1])
	x180, x179 := bits.mul_u64(arg1[1], arg2[0])
	x182, x181 := bits.mul_u64(arg1[0], arg2[7])
	x184, x183 := bits.mul_u64(arg1[0], arg2[6])
	x186, x185 := bits.mul_u64(arg1[0], arg2[5])
	x188, x187 := bits.mul_u64(arg1[0], arg2[4])
	x190, x189 := bits.mul_u64(arg1[0], arg2[3])
	x192, x191 := bits.mul_u64(arg1[0], arg2[2])
	x194, x193 := bits.mul_u64(arg1[0], arg2[1])
	x196, x195 := bits.mul_u64(arg1[0], arg2[0])
	x197, x198 := bits.add_u64(x43, x31, u64(0x0))
	x199, _ := bits.add_u64(x44, x32, u64(fiat.u1(x198)))
	x201, x202 := bits.add_u64(x53, x197, u64(0x0))
	x203, _ := bits.add_u64(x54, x199, u64(fiat.u1(x202)))
	x205, x206 := bits.add_u64(x61, x201, u64(0x0))
	x207, _ := bits.add_u64(x62, x203, u64(fiat.u1(x206)))
	x209, x210 := bits.add_u64(x153, x205, u64(0x0))
	x211, _ := bits.add_u64(x154, x207, u64(fiat.u1(x210)))
	x213, x214 := bits.add_u64(x163, x209, u64(0x0))
	x215, _ := bits.add_u64(x164, x211, u64(fiat.u1(x214)))
	x217, x218 := bits.add_u64(x175, x213, u64(0x0))
	x219, _ := bits.add_u64(x176, x215, u64(fiat.u1(x218)))
	x221, x222 := bits.add_u64(x189, x217, u64(0x0))
	x223, _ := bits.add_u64(x190, x219, u64(fiat.u1(x222)))
	x225 := ((x221 >> 56) | ((x223 << 8) & 0xffffffffffffffff))
	x226 := (x221 & 0xffffffffffffff)
	x227, x228 := bits.add_u64(x89, x81, u64(0x0))
	x229, _ := bits.add_u64(x90, x82, u64(fiat.u1(x228)))
	x231, x232 := bits.add_u64(x97, x227, u64(0x0))
	x233, _ := bits.add_u64(x98, x229, u64(fiat.u1(x232)))
	x235, x236 := bits.add_u64(x105, x231, u64(0x0))
	x237, _ := bits.add_u64(x106, x233, u64(fiat.u1(x236)))
	x239, x240 := bits.add_u64(x125, x235, u64(0x0))
	x241, _ := bits.add_u64(x126, x237, u64(fiat.u1(x240)))
	x243, x244 := bits.add_u64(x127, x239, u64(0x0))
	x245, _ := bits.add_u64(x128, x241, u64(fiat.u1(x244)))
	x247, x248 := bits.add_u64(x131, x243, u64(0x0))
	x249, _ := bits.add_u64(x132, x245, u64(fiat.u1(x248)))
	x251, x252 := bits.add_u64(x137, x247, u64(0x0))
	x253, _ := bits.add_u64(x138, x249, u64(fiat.u1(x252)))
	x255, x256 := bits.add_u64(x145, x251, u64(0x0))
	x257, _ := bits.add_u64(x146, x253, u64(fiat.u1(x256)))
	x259, x260 := bits.add_u64(x155, x255, u64(0x0))
	x261, _ := bits.add_u64(x156, x257, u64(fiat.u1(x260)))
	x263, x264 := bits.add_u64(x167, x259, u64(0x0))
	x265, _ := bits.add_u64(x168, x261, u64(fiat.u1(x264)))
	x267, x268 := bits.add_u64(x181, x263, u64(0x0))
	x269, _ := bits.add_u64(x182, x265, u64(fiat.u1(x268)))
	x271, x272 := bits.add_u64(x25, x13, u64(0x0))
	x273, _ := bits.add_u64(x26, x14, u64(fiat.u1(x272)))
	x275, x276 := bits.add_u64(x83, x271, u64(0x0))
	x277, _ := bits.add_u64(x84, x273, u64(fiat.u1(x276)))
	x279, x280 := bits.add_u64(x91, x275, u64(0x0))
	x281, _ := bits.add_u64(x92, x277, u64(fiat.u1(x280)))
	x283, x284 := bits.add_u64(x99, x279, u64(0x0))
	x285, _ := bits.add_u64(x100, x281, u64(fiat.u1(x284)))
	x287, x288 := bits.add_u64(x107, x283, u64(0x0))
	x289, _ := bits.add_u64(x108, x285, u64(fiat.u1(x288)))
	x291, x292 := bits.add_u64(x113, x287, u64(0x0))
	x293, _ := bits.add_u64(x114, x289, u64(fiat.u1(x292)))
	x295, x296 := bits.add_u64(x129, x291, u64(0x0))
	x297, _ := bits.add_u64(x130, x293, u64(fiat.u1(x296)))
	x299, x300 := bits.add_u64(x133, x295, u64(0x0))
	x301, _ := bits.add_u64(x134, x297, u64(fiat.u1(x300)))
	x303, x304 := bits.add_u64(x139, x299, u64(0x0))
	x305, _ := bits.add_u64(x140, x301, u64(fiat.u1(x304)))
	x307, x308 := bits.add_u64(x147, x303, u64(0x0))
	x309, _ := bits.add_u64(x148, x305, u64(fiat.u1(x308)))
	x311, x312 := bits.add_u64(x157, x307, u64(0x0))
	x313, _ := bits.add_u64(x158, x309, u64(fiat.u1(x312)))
	x315, x316 := bits.add_u64(x169, x311, u64(0x0))
	x317, _ := bits.add_u64(x170, x313, u64(fiat.u1(x316)))
	x319, x320 := bits.add_u64(x183, x315, u64(0x0))
	x321, _ := bits.add_u64(x184, x317, u64(fiat.u1(x320)))
	x323, x324 := bits.add_u64(x19, x15, u64(0x0))
	x325, _ := bits.add_u64(x20, x16, u64(fiat.u1(x324)))
	x327, x328 := bits.add_u64(x27, x323, u64(0x0))
	x329, _ := bits.add_u64(x28, x325, u64(fiat.u1(x328)))
	x331, x332 := bits.add_u64(x39, x327, u64(0x0))
	x333, _ := bits.add_u64(x40, x329, u64(fiat.u1(x332)))
	x335, x336 := bits.add_u64(x85, x331, u64(0x0))
	x337, _ := bits.add_u64(x86, x333, u64(fiat.u1(x336)))
	x339, x340 := bits.add_u64(x93, x335, u64(0x0))
	x341, _ := bits.add_u64(x94, x337, u64(fiat.u1(x340)))
	x343, x344 := bits.add_u64(x101, x339, u64(0x0))
	x345, _ := bits.add_u64(x102, x341, u64(fiat.u1(x344)))
	x347, x348 := bits.add_u64(x109, x343, u64(0x0))
	x349, _ := bits.add_u64(x110, x345, u64(fiat.u1(x348)))
	x351, x352 := bits.add_u64(x115, x347, u64(0x0))
	x353, _ := bits.add_u64(x116, x349, u64(fiat.u1(x352)))
	x355, x356 := bits.add_u64(x119, x351, u64(0x0))
	x357, _ := bits.add_u64(x120, x353, u64(fiat.u1(x356)))
	x359, x360 := bits.add_u64(x135, x355, u64(0x0))
	x361, _ := bits.add_u64(x136, x357, u64(fiat.u1(x360)))
	x363, x364 := bits.add_u64(x141, x359, u64(0x0))
	x365, _ := bits.add_u64(x142, x361, u64(fiat.u1(x364)))
	x367, x368 := bits.add_u64(x149, x363, u64(0x0))
	x369, _ := bits.add_u64(x150, x365, u64(fiat.u1(x368)))
	x371, x372 := bits.add_u64(x159, x367, u64(0x0))
	x373, _ := bits.add_u64(x160, x369, u64(fiat.u1(x372)))
	x375, x376 := bits.add_u64(x171, x371, u64(0x0))
	x377, _ := bits.add_u64(x172, x373, u64(fiat.u1(x376)))
	x379, x380 := bits.add_u64(x185, x375, u64(0x0))
	x381, _ := bits.add_u64(x186, x377, u64(fiat.u1(x380)))
	x383, x384 := bits.add_u64(x21, x17, u64(0x0))
	x385, _ := bits.add_u64(x22, x18, u64(fiat.u1(x384)))
	x387, x388 := bits.add_u64(x23, x383, u64(0x0))
	x389, _ := bits.add_u64(x24, x385, u64(fiat.u1(x388)))
	x391, x392 := bits.add_u64(x29, x387, u64(0x0))
	x393, _ := bits.add_u64(x30, x389, u64(fiat.u1(x392)))
	x395, x396 := bits.add_u64(x41, x391, u64(0x0))
	x397, _ := bits.add_u64(x42, x393, u64(fiat.u1(x396)))
	x399, x400 := bits.add_u64(x51, x395, u64(0x0))
	x401, _ := bits.add_u64(x52, x397, u64(fiat.u1(x400)))
	x403, x404 := bits.add_u64(x87, x399, u64(0x0))
	x405, _ := bits.add_u64(x88, x401, u64(fiat.u1(x404)))
	x407, x408 := bits.add_u64(x95, x403, u64(0x0))
	x409, _ := bits.add_u64(x96, x405, u64(fiat.u1(x408)))
	x411, x412 := bits.add_u64(x103, x407, u64(0x0))
	x413, _ := bits.add_u64(x104, x409, u64(fiat.u1(x412)))
	x415, x416 := bits.add_u64(x111, x411, u64(0x0))
	x417, _ := bits.add_u64(x112, x413, u64(fiat.u1(x416)))
	x419, x420 := bits.add_u64(x117, x415, u64(0x0))
	x421, _ := bits.add_u64(x118, x417, u64(fiat.u1(x420)))
	x423, x424 := bits.add_u64(x121, x419, u64(0x0))
	x425, _ := bits.add_u64(x122, x421, u64(fiat.u1(x424)))
	x427, x428 := bits.add_u64(x123, x423, u64(0x0))
	x429, _ := bits.add_u64(x124, x425, u64(fiat.u1(x428)))
	x431, x432 := bits.add_u64(x143, x427, u64(0x0))
	x433, _ := bits.add_u64(x144, x429, u64(fiat.u1(x432)))
	x435, x436 := bits.add_u64(x151, x431, u64(0x0))
	x437, _ := bits.add_u64(x152, x433, u64(fiat.u1(x436)))
	x439, x440 := bits.add_u64(x161, x435, u64(0x0))
	x441, _ := bits.add_u64(x162, x437, u64(fiat.u1(x440)))
	x443, x444 := bits.add_u64(x173, x439, u64(0x0))
	x445, _ := bits.add_u64(x174, x441, u64(fiat.u1(x444)))
	x447, x448 := bits.add_u64(x187, x443, u64(0x0))
	x449, _ := bits.add_u64(x188, x445, u64(fiat.u1(x448)))
	x451, x452 := bits.add_u64(x33, x1, u64(0x0))
	x453, _ := bits.add_u64(x34, x2, u64(fiat.u1(x452)))
	x455, x456 := bits.add_u64(x45, x451, u64(0x0))
	x457, _ := bits.add_u64(x46, x453, u64(fiat.u1(x456)))
	x459, x460 := bits.add_u64(x55, x455, u64(0x0))
	x461, _ := bits.add_u64(x56, x457, u64(fiat.u1(x460)))
	x463, x464 := bits.add_u64(x63, x459, u64(0x0))
	x465, _ := bits.add_u64(x64, x461, u64(fiat.u1(x464)))
	x467, x468 := bits.add_u64(x69, x463, u64(0x0))
	x469, _ := bits.add_u64(x70, x465, u64(fiat.u1(x468)))
	x471, x472 := bits.add_u64(x165, x467, u64(0x0))
	x473, _ := bits.add_u64(x166, x469, u64(fiat.u1(x472)))
	x475, x476 := bits.add_u64(x177, x471, u64(0x0))
	x477, _ := bits.add_u64(x178, x473, u64(fiat.u1(x476)))
	x479, x480 := bits.add_u64(x191, x475, u64(0x0))
	x481, _ := bits.add_u64(x192, x477, u64(fiat.u1(x480)))
	x483, x484 := bits.add_u64(x7, x3, u64(0x0))
	x485, _ := bits.add_u64(x8, x4, u64(fiat.u1(x484)))
	x487, x488 := bits.add_u64(x35, x483, u64(0x0))
	x489, _ := bits.add_u64(x36, x485, u64(fiat.u1(x488)))
	x491, x492 := bits.add_u64(x47, x487, u64(0x0))
	x493, _ := bits.add_u64(x48, x489, u64(fiat.u1(x492)))
	x495, x496 := bits.add_u64(x57, x491, u64(0x0))
	x497, _ := bits.add_u64(x58, x493, u64(fiat.u1(x496)))
	x499, x500 := bits.add_u64(x65, x495, u64(0x0))
	x501, _ := bits.add_u64(x66, x497, u64(fiat.u1(x500)))
	x503, x504 := bits.add_u64(x71, x499, u64(0x0))
	x505, _ := bits.add_u64(x72, x501, u64(fiat.u1(x504)))
	x507, x508 := bits.add_u64(x75, x503, u64(0x0))
	x509, _ := bits.add_u64(x76, x505, u64(fiat.u1(x508)))
	x511, x512 := bits.add_u64(x179, x507, u64(0x0))
	x513, _ := bits.add_u64(x180, x509, u64(fiat.u1(x512)))
	x515, x516 := bits.add_u64(x193, x511, u64(0x0))
	x517, _ := bits.add_u64(x194, x513, u64(fiat.u1(x516)))
	x519, x520 := bits.add_u64(x9, x5, u64(0x0))
	x521, _ := bits.add_u64(x10, x6, u64(fiat.u1(x520)))
	x523, x524 := bits.add_u64(x11, x519, u64(0x0))
	x525, _ := bits.add_u64(x12, x521, u64(fiat.u1(x524)))
	x527, x528 := bits.add_u64(x37, x523, u64(0x0))
	x529, _ := bits.add_u64(x38, x525, u64(fiat.u1(x528)))
	x531, x532 := bits.add_u64(x49, x527, u64(0x0))
	x533, _ := bits.add_u64(x50, x529, u64(fiat.u1(x532)))
	x535, x536 := bits.add_u64(x59, x531, u64(0x0))
	x537, _ := bits.add_u64(x60, x533, u64(fiat.u1(x536)))
	x539, x540 := bits.add_u64(x67, x535, u64(0x0))
	x541, _ := bits.add_u64(x68, x537, u64(fiat.u1(x540)))
	x543, x544 := bits.add_u64(x73, x539, u64(0x0))
	x545, _ := bits.add_u64(x74, x541, u64(fiat.u1(x544)))
	x547, x548 := bits.add_u64(x77, x543, u64(0x0))
	x549, _ := bits.add_u64(x78, x545, u64(fiat.u1(x548)))
	x551, x552 := bits.add_u64(x79, x547, u64(0x0))
	x553, _ := bits.add_u64(x80, x549, u64(fiat.u1(x552)))
	x555, x556 := bits.add_u64(x195, x551, u64(0x0))
	x557, _ := bits.add_u64(x196, x553, u64(fiat.u1(x556)))
	x559, x560 := bits.add_u64(x225, x447, u64(0x0))
	x561 := (u64(fiat.u1(x560)) + x449)
	x562 := ((x267 >> 56) | ((x269 << 8) & 0xffffffffffffffff))
	x563 := (x267 & 0xffffffffffffff)
	x564, x565 := bits.add_u64(x559, x562, u64(0x0))
	x566 := (u64(fiat.u1(x565)) + x561)
	x567 := ((x564 >> 56) | ((x566 << 8) & 0xffffffffffffffff))
	x568 := (x564 & 0xffffffffffffff)
	x569, x570 := bits.add_u64(x555, x562, u64(0x0))
	x571 := (u64(fiat.u1(x570)) + x557)
	x572, x573 := bits.add_u64(x567, x379, u64(0x0))
	x574 := (u64(fiat.u1(x573)) + x381)
	x575 := ((x569 >> 56) | ((x571 << 8) & 0xffffffffffffffff))
	x576 := (x569 & 0xffffffffffffff)
	x577, x578 := bits.add_u64(x575, x515, u64(0x0))
	x579 := (u64(fiat.u1(x578)) + x517)
	x580 := ((x572 >> 56) | ((x574 << 8) & 0xffffffffffffffff))
	x581 := (x572 & 0xffffffffffffff)
	x582, x583 := bits.add_u64(x580, x319, u64(0x0))
	x584 := (u64(fiat.u1(x583)) + x321)
	x585 := ((x577 >> 56) | ((x579 << 8) & 0xffffffffffffffff))
	x586 := (x577 & 0xffffffffffffff)
	x587, x588 := bits.add_u64(x585, x479, u64(0x0))
	x589 := (u64(fiat.u1(x588)) + x481)
	x590 := ((x582 >> 56) | ((x584 << 8) & 0xffffffffffffffff))
	x591 := (x582 & 0xffffffffffffff)
	x592 := (x590 + x563)
	x593 := ((x587 >> 56) | ((x589 << 8) & 0xffffffffffffffff))
	x594 := (x587 & 0xffffffffffffff)
	x595 := (x593 + x226)
	x596 := (x592 >> 56)
	x597 := (x592 & 0xffffffffffffff)
	x598 := (x595 >> 56)
	x599 := (x595 & 0xffffffffffffff)
	x600 := (x568 + x596)
	x601 := (x576 + x596)
	x602 := (x598 + x600)
	x603 := fiat.u1((x602 >> 56))
	x604 := (x602 & 0xffffffffffffff)
	x605 := (u64(x603) + x581)
	x606 := fiat.u1((x601 >> 56))
	x607 := (x601 & 0xffffffffffffff)
	x608 := (u64(x606) + x586)
	out1[0] = x607
	out1[1] = x608
	out1[2] = x594
	out1[3] = x599
	out1[4] = x604
	out1[5] = x605
	out1[6] = x591
	out1[7] = x597
}

fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
	x1 := arg1[7]
	x2 := arg1[7]
	x3 := (x1 * 0x2)
	x4 := (x2 * 0x2)
	x5 := (arg1[7] * 0x2)
	x6 := arg1[6]
	x7 := arg1[6]
	x8 := (x6 * 0x2)
	x9 := (x7 * 0x2)
	x10 := (arg1[6] * 0x2)
	x11 := arg1[5]
	x12 := arg1[5]
	x13 := (x11 * 0x2)
	x14 := (x12 * 0x2)
	x15 := (arg1[5] * 0x2)
	x16 := arg1[4]
	x17 := arg1[4]
	x18 := (arg1[4] * 0x2)
	x19 := (arg1[3] * 0x2)
	x20 := (arg1[2] * 0x2)
	x21 := (arg1[1] * 0x2)
	x23, x22 := bits.mul_u64(arg1[7], x1)
	x25, x24 := bits.mul_u64(arg1[6], x3)
	x27, x26 := bits.mul_u64(arg1[6], x6)
	x29, x28 := bits.mul_u64(arg1[5], x3)
	x31, x30 := bits.mul_u64(arg1[7], x1)
	x33, x32 := bits.mul_u64(arg1[6], x3)
	x35, x34 := bits.mul_u64(arg1[6], x6)
	x37, x36 := bits.mul_u64(arg1[5], x3)
	x39, x38 := bits.mul_u64(arg1[7], x2)
	x41, x40 := bits.mul_u64(arg1[6], x4)
	x43, x42 := bits.mul_u64(arg1[6], x7)
	x45, x44 := bits.mul_u64(arg1[5], x4)
	x47, x46 := bits.mul_u64(arg1[5], x9)
	x49, x48 := bits.mul_u64(arg1[5], x8)
	x51, x50 := bits.mul_u64(arg1[5], x12)
	x53, x52 := bits.mul_u64(arg1[5], x11)
	x55, x54 := bits.mul_u64(arg1[4], x4)
	x57, x56 := bits.mul_u64(arg1[4], x3)
	x59, x58 := bits.mul_u64(arg1[4], x9)
	x61, x60 := bits.mul_u64(arg1[4], x8)
	x63, x62 := bits.mul_u64(arg1[4], x14)
	x65, x64 := bits.mul_u64(arg1[4], x13)
	x67, x66 := bits.mul_u64(arg1[4], x17)
	x69, x68 := bits.mul_u64(arg1[4], x16)
	x71, x70 := bits.mul_u64(arg1[3], x4)
	x73, x72 := bits.mul_u64(arg1[3], x3)
	x75, x74 := bits.mul_u64(arg1[3], x9)
	x77, x76 := bits.mul_u64(arg1[3], x8)
	x79, x78 := bits.mul_u64(arg1[3], x14)
	x81, x80 := bits.mul_u64(arg1[3], x13)
	x83, x82 := bits.mul_u64(arg1[3], x18)
	x85, x84 := bits.mul_u64(arg1[3], arg1[3])
	x87, x86 := bits.mul_u64(arg1[2], x4)
	x89, x88 := bits.mul_u64(arg1[2], x3)
	x91, x90 := bits.mul_u64(arg1[2], x9)
	x93, x92 := bits.mul_u64(arg1[2], x8)
	x95, x94 := bits.mul_u64(arg1[2], x15)
	x97, x96 := bits.mul_u64(arg1[2], x18)
	x99, x98 := bits.mul_u64(arg1[2], x19)
	x101, x100 := bits.mul_u64(arg1[2], arg1[2])
	x103, x102 := bits.mul_u64(arg1[1], x4)
	x105, x104 := bits.mul_u64(arg1[1], x3)
	x107, x106 := bits.mul_u64(arg1[1], x10)
	x109, x108 := bits.mul_u64(arg1[1], x15)
	x111, x110 := bits.mul_u64(arg1[1], x18)
	x113, x112 := bits.mul_u64(arg1[1], x19)
	x115, x114 := bits.mul_u64(arg1[1], x20)
	x117, x116 := bits.mul_u64(arg1[1], arg1[1])
	x119, x118 := bits.mul_u64(arg1[0], x5)
	x121, x120 := bits.mul_u64(arg1[0], x10)
	x123, x122 := bits.mul_u64(arg1[0], x15)
	x125, x124 := bits.mul_u64(arg1[0], x18)
	x127, x126 := bits.mul_u64(arg1[0], x19)
	x129, x128 := bits.mul_u64(arg1[0], x20)
	x131, x130 := bits.mul_u64(arg1[0], x21)
	x133, x132 := bits.mul_u64(arg1[0], arg1[0])
	x134, x135 := bits.add_u64(x54, x46, u64(0x0))
	x136, _ := bits.add_u64(x55, x47, u64(fiat.u1(x135)))
	x138, x139 := bits.add_u64(x114, x134, u64(0x0))
	x140, _ := bits.add_u64(x115, x136, u64(fiat.u1(x139)))
	x142, x143 := bits.add_u64(x126, x138, u64(0x0))
	x144, _ := bits.add_u64(x127, x140, u64(fiat.u1(x143)))
	x146 := ((x142 >> 56) | ((x144 << 8) & 0xffffffffffffffff))
	x147 := (x142 & 0xffffffffffffff)
	x148, x149 := bits.add_u64(x56, x48, u64(0x0))
	x150, _ := bits.add_u64(x57, x49, u64(fiat.u1(x149)))
	x152, x153 := bits.add_u64(x82, x148, u64(0x0))
	x154, _ := bits.add_u64(x83, x150, u64(fiat.u1(x153)))
	x156, x157 := bits.add_u64(x94, x152, u64(0x0))
	x158, _ := bits.add_u64(x95, x154, u64(fiat.u1(x157)))
	x160, x161 := bits.add_u64(x106, x156, u64(0x0))
	x162, _ := bits.add_u64(x107, x158, u64(fiat.u1(x161)))
	x164, x165 := bits.add_u64(x118, x160, u64(0x0))
	x166, _ := bits.add_u64(x119, x162, u64(fiat.u1(x165)))
	x168, x169 := bits.add_u64(x38, x30, u64(0x0))
	x170, _ := bits.add_u64(x39, x31, u64(fiat.u1(x169)))
	x172, x173 := bits.add_u64(x52, x168, u64(0x0))
	x174, _ := bits.add_u64(x53, x170, u64(fiat.u1(x173)))
	x176, x177 := bits.add_u64(x60, x172, u64(0x0))
	x178, _ := bits.add_u64(x61, x174, u64(fiat.u1(x177)))
	x180, x181 := bits.add_u64(x72, x176, u64(0x0))
	x182, _ := bits.add_u64(x73, x178, u64(fiat.u1(x181)))
	x184, x185 := bits.add_u64(x84, x180, u64(0x0))
	x186, _ := bits.add_u64(x85, x182, u64(fiat.u1(x185)))
	x188, x189 := bits.add_u64(x96, x184, u64(0x0))
	x190, _ := bits.add_u64(x97, x186, u64(fiat.u1(x189)))
	x192, x193 := bits.add_u64(x108, x188, u64(0x0))
	x194, _ := bits.add_u64(x109, x190, u64(fiat.u1(x193)))
	x196, x197 := bits.add_u64(x120, x192, u64(0x0))
	x198, _ := bits.add_u64(x121, x194, u64(fiat.u1(x197)))
	x200, x201 := bits.add_u64(x40, x32, u64(0x0))
	x202, _ := bits.add_u64(x41, x33, u64(fiat.u1(x201)))
	x204, x205 := bits.add_u64(x64, x200, u64(0x0))
	x206, _ := bits.add_u64(x65, x202, u64(fiat.u1(x205)))
	x208, x209 := bits.add_u64(x76, x204, u64(0x0))
	x210, _ := bits.add_u64(x77, x206, u64(fiat.u1(x209)))
	x212, x213 := bits.add_u64(x88, x208, u64(0x0))
	x214, _ := bits.add_u64(x89, x210, u64(fiat.u1(x213)))
	x216, x217 := bits.add_u64(x98, x212, u64(0x0))
	x218, _ := bits.add_u64(x99, x214, u64(fiat.u1(x217)))
	x220, x221 := bits.add_u64(x110, x216, u64(0x0))
	x222, _ := bits.add_u64(x111, x218, u64(fiat.u1(x221)))
	x224, x225 := bits.add_u64(x122, x220, u64(0x0))
	x226, _ := bits.add_u64(x123, x222, u64(fiat.u1(x225)))
	x228, x229 := bits.add_u64(x36, x34, u64(0x0))
	x230, _ := bits.add_u64(x37, x35, u64(fiat.u1(x229)))
	x232, x233 := bits.add_u64(x42, x228, u64(0x0))
	x234, _ := bits.add_u64(x43, x230, u64(fiat.u1(x233)))
	x236, x237 := bits.add_u64(x44, x232, u64(0x0))
	x238, _ := bits.add_u64(x45, x234, u64(fiat.u1(x237)))
	x240, x241 := bits.add_u64(x68, x236, u64(0x0))
	x242, _ := bits.add_u64(x69, x238, u64(fiat.u1(x241)))
	x244, x245 := bits.add_u64(x80, x240, u64(0x0))
	x246, _ := bits.add_u64(x81, x242, u64(fiat.u1(x245)))
	x248, x249 := bits.add_u64(x92, x244, u64(0x0))
	x250, _ := bits.add_u64(x93, x246, u64(fiat.u1(x249)))
	x252, x253 := bits.add_u64(x100, x248, u64(0x0))
	x254, _ := bits.add_u64(x101, x250, u64(fiat.u1(x253)))
	x256, x257 := bits.add_u64(x104, x252, u64(0x0))
	x258, _ := bits.add_u64(x105, x254, u64(fiat.u1(x257)))
	x260, x261 := bits.add_u64(x112, x256, u64(0x0))
	x262, _ := bits.add_u64(x113, x258, u64(fiat.u1(x261)))
	x264, x265 := bits.add_u64(x124, x260, u64(0x0))
	x266, _ := bits.add_u64(x125, x262, u64(fiat.u1(x265)))
	x268, x269 := bits.add_u64(x50, x22, u64(0x0))
	x270, _ := bits.add_u64(x51, x23, u64(fiat.u1(x269)))
	x272, x273 := bits.add_u64(x58, x268, u64(0x0))
	x274, _ := bits.add_u64(x59, x270, u64(fiat.u1(x273)))
	x276, x277 := bits.add_u64(x70, x272, u64(0x0))
	x278, _ := bits.add_u64(x71, x274, u64(fiat.u1(x277)))
	x280, x281 := bits.add_u64(x116, x276, u64(0x0))
	x282, _ := bits.add_u64(x117, x278, u64(fiat.u1(x281)))
	x284, x285 := bits.add_u64(x128, x280, u64(0x0))
	x286, _ := bits.add_u64(x129, x282, u64(fiat.u1(x285)))
	x288, x289 := bits.add_u64(x62, x24, u64(0x0))
	x290, _ := bits.add_u64(x63, x25, u64(fiat.u1(x289)))
	x292, x293 := bits.add_u64(x74, x288, u64(0x0))
	x294, _ := bits.add_u64(x75, x290, u64(fiat.u1(x293)))
	x296, x297 := bits.add_u64(x86, x292, u64(0x0))
	x298, _ := bits.add_u64(x87, x294, u64(fiat.u1(x297)))
	x300, x301 := bits.add_u64(x130, x296, u64(0x0))
	x302, _ := bits.add_u64(x131, x298, u64(fiat.u1(x301)))
	x304, x305 := bits.add_u64(x28, x26, u64(0x0))
	x306, _ := bits.add_u64(x29, x27, u64(fiat.u1(x305)))
	x308, x309 := bits.add_u64(x66, x304, u64(0x0))
	x310, _ := bits.add_u64(x67, x306, u64(fiat.u1(x309)))
	x312, x313 := bits.add_u64(x78, x308, u64(0x0))
	x314, _ := bits.add_u64(x79, x310, u64(fiat.u1(x313)))
	x316, x317 := bits.add_u64(x90, x312, u64(0x0))
	x318, _ := bits.add_u64(x91, x314, u64(fiat.u1(x317)))
	x320, x321 := bits.add_u64(x102, x316, u64(0x0))
	x322, _ := bits.add_u64(x103, x318, u64(fiat.u1(x321)))
	x324, x325 := bits.add_u64(x132, x320, u64(0x0))
	x326, _ := bits.add_u64(x133, x322, u64(fiat.u1(x325)))
	x328, x329 := bits.add_u64(x146, x264, u64(0x0))
	x330 := (u64(fiat.u1(x329)) + x266)
	x331 := ((x164 >> 56) | ((x166 << 8) & 0xffffffffffffffff))
	x332 := (x164 & 0xffffffffffffff)
	x333, x334 := bits.add_u64(x328, x331, u64(0x0))
	x335 := (u64(fiat.u1(x334)) + x330)
	x336 := ((x333 >> 56) | ((x335 << 8) & 0xffffffffffffffff))
	x337 := (x333 & 0xffffffffffffff)
	x338, x339 := bits.add_u64(x324, x331, u64(0x0))
	x340 := (u64(fiat.u1(x339)) + x326)
	x341, x342 := bits.add_u64(x336, x224, u64(0x0))
	x343 := (u64(fiat.u1(x342)) + x226)
	x344 := ((x338 >> 56) | ((x340 << 8) & 0xffffffffffffffff))
	x345 := (x338 & 0xffffffffffffff)
	x346, x347 := bits.add_u64(x344, x300, u64(0x0))
	x348 := (u64(fiat.u1(x347)) + x302)
	x349 := ((x341 >> 56) | ((x343 << 8) & 0xffffffffffffffff))
	x350 := (x341 & 0xffffffffffffff)
	x351, x352 := bits.add_u64(x349, x196, u64(0x0))
	x353 := (u64(fiat.u1(x352)) + x198)
	x354 := ((x346 >> 56) | ((x348 << 8) & 0xffffffffffffffff))
	x355 := (x346 & 0xffffffffffffff)
	x356, x357 := bits.add_u64(x354, x284, u64(0x0))
	x358 := (u64(fiat.u1(x357)) + x286)
	x359 := ((x351 >> 56) | ((x353 << 8) & 0xffffffffffffffff))
	x360 := (x351 & 0xffffffffffffff)
	x361 := (x359 + x332)
	x362 := ((x356 >> 56) | ((x358 << 8) & 0xffffffffffffffff))
	x363 := (x356 & 0xffffffffffffff)
	x364 := (x362 + x147)
	x365 := (x361 >> 56)
	x366 := (x361 & 0xffffffffffffff)
	x367 := (x364 >> 56)
	x368 := (x364 & 0xffffffffffffff)
	x369 := (x337 + x365)
	x370 := (x345 + x365)
	x371 := (x367 + x369)
	x372 := fiat.u1((x371 >> 56))
	x373 := (x371 & 0xffffffffffffff)
	x374 := (u64(x372) + x350)
	x375 := fiat.u1((x370 >> 56))
	x376 := (x370 & 0xffffffffffffff)
	x377 := (u64(x375) + x355)
	out1[0] = x376
	out1[1] = x377
	out1[2] = x363
	out1[3] = x368
	out1[4] = x373
	out1[5] = x374
	out1[6] = x360
	out1[7] = x366
}

fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
	x1 := arg1[3]
	x2 := arg1[7]
	x3 := (x2 >> 56)
	x4 := (((x1 >> 56) + arg1[4]) + x3)
	x5 := (arg1[0] + x3)
	x6 := ((x4 >> 56) + arg1[5])
	x7 := ((x5 >> 56) + arg1[1])
	x8 := ((x6 >> 56) + arg1[6])
	x9 := ((x7 >> 56) + arg1[2])
	x10 := ((x8 >> 56) + (x2 & 0xffffffffffffff))
	x11 := ((x9 >> 56) + (x1 & 0xffffffffffffff))
	x12 := fiat.u1((x10 >> 56))
	x13 := ((x5 & 0xffffffffffffff) + u64(x12))
	x14 := (u64(fiat.u1((x11 >> 56))) + ((x4 & 0xffffffffffffff) + u64(x12)))
	x15 := (x13 & 0xffffffffffffff)
	x16 := (u64(fiat.u1((x13 >> 56))) + (x7 & 0xffffffffffffff))
	x17 := (x9 & 0xffffffffffffff)
	x18 := (x11 & 0xffffffffffffff)
	x19 := (x14 & 0xffffffffffffff)
	x20 := (u64(fiat.u1((x14 >> 56))) + (x6 & 0xffffffffffffff))
	x21 := (x8 & 0xffffffffffffff)
	x22 := (x10 & 0xffffffffffffff)
	out1[0] = x15
	out1[1] = x16
	out1[2] = x17
	out1[3] = x18
	out1[4] = x19
	out1[5] = x20
	out1[6] = x21
	out1[7] = x22
}

fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
	x1 := (arg1[0] + arg2[0])
	x2 := (arg1[1] + arg2[1])
	x3 := (arg1[2] + arg2[2])
	x4 := (arg1[3] + arg2[3])
	x5 := (arg1[4] + arg2[4])
	x6 := (arg1[5] + arg2[5])
	x7 := (arg1[6] + arg2[6])
	x8 := (arg1[7] + arg2[7])
	out1[0] = x1
	out1[1] = x2
	out1[2] = x3
	out1[3] = x4
	out1[4] = x5
	out1[5] = x6
	out1[6] = x7
	out1[7] = x8
}

fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
	x1 := ((0x1fffffffffffffe + arg1[0]) - arg2[0])
	x2 := ((0x1fffffffffffffe + arg1[1]) - arg2[1])
	x3 := ((0x1fffffffffffffe + arg1[2]) - arg2[2])
	x4 := ((0x1fffffffffffffe + arg1[3]) - arg2[3])
	x5 := ((0x1fffffffffffffc + arg1[4]) - arg2[4])
	x6 := ((0x1fffffffffffffe + arg1[5]) - arg2[5])
	x7 := ((0x1fffffffffffffe + arg1[6]) - arg2[6])
	x8 := ((0x1fffffffffffffe + arg1[7]) - arg2[7])
	out1[0] = x1
	out1[1] = x2
	out1[2] = x3
	out1[3] = x4
	out1[4] = x5
	out1[5] = x6
	out1[6] = x7
	out1[7] = x8
}

fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
	x1 := (0x1fffffffffffffe - arg1[0])
	x2 := (0x1fffffffffffffe - arg1[1])
	x3 := (0x1fffffffffffffe - arg1[2])
	x4 := (0x1fffffffffffffe - arg1[3])
	x5 := (0x1fffffffffffffc - arg1[4])
	x6 := (0x1fffffffffffffe - arg1[5])
	x7 := (0x1fffffffffffffe - arg1[6])
	x8 := (0x1fffffffffffffe - arg1[7])
	out1[0] = x1
	out1[1] = x2
	out1[2] = x3
	out1[3] = x4
	out1[4] = x5
	out1[5] = x6
	out1[6] = x7
	out1[7] = x8
}

@(optimization_mode = "none")
fe_cond_assign :: #force_no_inline proc "contextless" (
	out1, arg1: ^Tight_Field_Element,
	arg2: int,
) {
	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
	x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4])
	x6 := fiat.cmovznz_u64(fiat.u1(arg2), out1[5], arg1[5])
	x7 := fiat.cmovznz_u64(fiat.u1(arg2), out1[6], arg1[6])
	x8 := fiat.cmovznz_u64(fiat.u1(arg2), out1[7], arg1[7])
	out1[0] = x1
	out1[1] = x2
	out1[2] = x3
	out1[3] = x4
	out1[4] = x5
	out1[5] = x6
	out1[6] = x7
	out1[7] = x8
}

fe_to_bytes :: proc "contextless" (out1: ^[56]byte, arg1: ^Tight_Field_Element) {
	x1, x2 := _subborrowx_u56(0x0, arg1[0], 0xffffffffffffff)
	x3, x4 := _subborrowx_u56(x2, arg1[1], 0xffffffffffffff)
	x5, x6 := _subborrowx_u56(x4, arg1[2], 0xffffffffffffff)
	x7, x8 := _subborrowx_u56(x6, arg1[3], 0xffffffffffffff)
	x9, x10 := _subborrowx_u56(x8, arg1[4], 0xfffffffffffffe)
	x11, x12 := _subborrowx_u56(x10, arg1[5], 0xffffffffffffff)
	x13, x14 := _subborrowx_u56(x12, arg1[6], 0xffffffffffffff)
	x15, x16 := _subborrowx_u56(x14, arg1[7], 0xffffffffffffff)
	x17 := fiat.cmovznz_u64(x16, u64(0x0), 0xffffffffffffffff)
	x18, x19 := _addcarryx_u56(0x0, x1, (x17 & 0xffffffffffffff))
	x20, x21 := _addcarryx_u56(x19, x3, (x17 & 0xffffffffffffff))
	x22, x23 := _addcarryx_u56(x21, x5, (x17 & 0xffffffffffffff))
	x24, x25 := _addcarryx_u56(x23, x7, (x17 & 0xffffffffffffff))
	x26, x27 := _addcarryx_u56(x25, x9, (x17 & 0xfffffffffffffe))
	x28, x29 := _addcarryx_u56(x27, x11, (x17 & 0xffffffffffffff))
	x30, x31 := _addcarryx_u56(x29, x13, (x17 & 0xffffffffffffff))
	x32, _ := _addcarryx_u56(x31, x15, (x17 & 0xffffffffffffff))
	x34 := (u8(x18) & 0xff)
	x35 := (x18 >> 8)
	x36 := (u8(x35) & 0xff)
	x37 := (x35 >> 8)
	x38 := (u8(x37) & 0xff)
	x39 := (x37 >> 8)
	x40 := (u8(x39) & 0xff)
	x41 := (x39 >> 8)
	x42 := (u8(x41) & 0xff)
	x43 := (x41 >> 8)
	x44 := (u8(x43) & 0xff)
	x45 := u8((x43 >> 8))
	x46 := (u8(x20) & 0xff)
	x47 := (x20 >> 8)
	x48 := (u8(x47) & 0xff)
	x49 := (x47 >> 8)
	x50 := (u8(x49) & 0xff)
	x51 := (x49 >> 8)
	x52 := (u8(x51) & 0xff)
	x53 := (x51 >> 8)
	x54 := (u8(x53) & 0xff)
	x55 := (x53 >> 8)
	x56 := (u8(x55) & 0xff)
	x57 := u8((x55 >> 8))
	x58 := (u8(x22) & 0xff)
	x59 := (x22 >> 8)
	x60 := (u8(x59) & 0xff)
	x61 := (x59 >> 8)
	x62 := (u8(x61) & 0xff)
	x63 := (x61 >> 8)
	x64 := (u8(x63) & 0xff)
	x65 := (x63 >> 8)
	x66 := (u8(x65) & 0xff)
	x67 := (x65 >> 8)
	x68 := (u8(x67) & 0xff)
	x69 := u8((x67 >> 8))
	x70 := (u8(x24) & 0xff)
	x71 := (x24 >> 8)
	x72 := (u8(x71) & 0xff)
	x73 := (x71 >> 8)
	x74 := (u8(x73) & 0xff)
	x75 := (x73 >> 8)
	x76 := (u8(x75) & 0xff)
	x77 := (x75 >> 8)
	x78 := (u8(x77) & 0xff)
	x79 := (x77 >> 8)
	x80 := (u8(x79) & 0xff)
	x81 := u8((x79 >> 8))
	x82 := (u8(x26) & 0xff)
	x83 := (x26 >> 8)
	x84 := (u8(x83) & 0xff)
	x85 := (x83 >> 8)
	x86 := (u8(x85) & 0xff)
	x87 := (x85 >> 8)
	x88 := (u8(x87) & 0xff)
	x89 := (x87 >> 8)
	x90 := (u8(x89) & 0xff)
	x91 := (x89 >> 8)
	x92 := (u8(x91) & 0xff)
	x93 := u8((x91 >> 8))
	x94 := (u8(x28) & 0xff)
	x95 := (x28 >> 8)
	x96 := (u8(x95) & 0xff)
	x97 := (x95 >> 8)
	x98 := (u8(x97) & 0xff)
	x99 := (x97 >> 8)
	x100 := (u8(x99) & 0xff)
	x101 := (x99 >> 8)
	x102 := (u8(x101) & 0xff)
	x103 := (x101 >> 8)
	x104 := (u8(x103) & 0xff)
	x105 := u8((x103 >> 8))
	x106 := (u8(x30) & 0xff)
	x107 := (x30 >> 8)
	x108 := (u8(x107) & 0xff)
	x109 := (x107 >> 8)
	x110 := (u8(x109) & 0xff)
	x111 := (x109 >> 8)
	x112 := (u8(x111) & 0xff)
	x113 := (x111 >> 8)
	x114 := (u8(x113) & 0xff)
	x115 := (x113 >> 8)
	x116 := (u8(x115) & 0xff)
	x117 := u8((x115 >> 8))
	x118 := (u8(x32) & 0xff)
	x119 := (x32 >> 8)
	x120 := (u8(x119) & 0xff)
	x121 := (x119 >> 8)
	x122 := (u8(x121) & 0xff)
	x123 := (x121 >> 8)
	x124 := (u8(x123) & 0xff)
	x125 := (x123 >> 8)
	x126 := (u8(x125) & 0xff)
	x127 := (x125 >> 8)
	x128 := (u8(x127) & 0xff)
	x129 := u8((x127 >> 8))
	out1[0] = x34
	out1[1] = x36
	out1[2] = x38
	out1[3] = x40
	out1[4] = x42
	out1[5] = x44
	out1[6] = x45
	out1[7] = x46
	out1[8] = x48
	out1[9] = x50
	out1[10] = x52
	out1[11] = x54
	out1[12] = x56
	out1[13] = x57
	out1[14] = x58
	out1[15] = x60
	out1[16] = x62
	out1[17] = x64
	out1[18] = x66
	out1[19] = x68
	out1[20] = x69
	out1[21] = x70
	out1[22] = x72
	out1[23] = x74
	out1[24] = x76
	out1[25] = x78
	out1[26] = x80
	out1[27] = x81
	out1[28] = x82
	out1[29] = x84
	out1[30] = x86
	out1[31] = x88
	out1[32] = x90
	out1[33] = x92
	out1[34] = x93
	out1[35] = x94
	out1[36] = x96
	out1[37] = x98
	out1[38] = x100
	out1[39] = x102
	out1[40] = x104
	out1[41] = x105
	out1[42] = x106
	out1[43] = x108
	out1[44] = x110
	out1[45] = x112
	out1[46] = x114
	out1[47] = x116
	out1[48] = x117
	out1[49] = x118
	out1[50] = x120
	out1[51] = x122
	out1[52] = x124
	out1[53] = x126
	out1[54] = x128
	out1[55] = x129
}

fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[56]byte) {
	x1 := (u64(arg1[55]) << 48)
	x2 := (u64(arg1[54]) << 40)
	x3 := (u64(arg1[53]) << 32)
	x4 := (u64(arg1[52]) << 24)
	x5 := (u64(arg1[51]) << 16)
	x6 := (u64(arg1[50]) << 8)
	x7 := arg1[49]
	x8 := (u64(arg1[48]) << 48)
	x9 := (u64(arg1[47]) << 40)
	x10 := (u64(arg1[46]) << 32)
	x11 := (u64(arg1[45]) << 24)
	x12 := (u64(arg1[44]) << 16)
	x13 := (u64(arg1[43]) << 8)
	x14 := arg1[42]
	x15 := (u64(arg1[41]) << 48)
	x16 := (u64(arg1[40]) << 40)
	x17 := (u64(arg1[39]) << 32)
	x18 := (u64(arg1[38]) << 24)
	x19 := (u64(arg1[37]) << 16)
	x20 := (u64(arg1[36]) << 8)
	x21 := arg1[35]
	x22 := (u64(arg1[34]) << 48)
	x23 := (u64(arg1[33]) << 40)
	x24 := (u64(arg1[32]) << 32)
	x25 := (u64(arg1[31]) << 24)
	x26 := (u64(arg1[30]) << 16)
	x27 := (u64(arg1[29]) << 8)
	x28 := arg1[28]
	x29 := (u64(arg1[27]) << 48)
	x30 := (u64(arg1[26]) << 40)
	x31 := (u64(arg1[25]) << 32)
	x32 := (u64(arg1[24]) << 24)
	x33 := (u64(arg1[23]) << 16)
	x34 := (u64(arg1[22]) << 8)
	x35 := arg1[21]
	x36 := (u64(arg1[20]) << 48)
	x37 := (u64(arg1[19]) << 40)
	x38 := (u64(arg1[18]) << 32)
	x39 := (u64(arg1[17]) << 24)
	x40 := (u64(arg1[16]) << 16)
	x41 := (u64(arg1[15]) << 8)
	x42 := arg1[14]
	x43 := (u64(arg1[13]) << 48)
	x44 := (u64(arg1[12]) << 40)
	x45 := (u64(arg1[11]) << 32)
	x46 := (u64(arg1[10]) << 24)
	x47 := (u64(arg1[9]) << 16)
	x48 := (u64(arg1[8]) << 8)
	x49 := arg1[7]
	x50 := (u64(arg1[6]) << 48)
	x51 := (u64(arg1[5]) << 40)
	x52 := (u64(arg1[4]) << 32)
	x53 := (u64(arg1[3]) << 24)
	x54 := (u64(arg1[2]) << 16)
	x55 := (u64(arg1[1]) << 8)
	x56 := arg1[0]
	x57 := (x55 + u64(x56))
	x58 := (x54 + x57)
	x59 := (x53 + x58)
	x60 := (x52 + x59)
	x61 := (x51 + x60)
	x62 := (x50 + x61)
	x63 := (x48 + u64(x49))
	x64 := (x47 + x63)
	x65 := (x46 + x64)
	x66 := (x45 + x65)
	x67 := (x44 + x66)
	x68 := (x43 + x67)
	x69 := (x41 + u64(x42))
	x70 := (x40 + x69)
	x71 := (x39 + x70)
	x72 := (x38 + x71)
	x73 := (x37 + x72)
	x74 := (x36 + x73)
	x75 := (x34 + u64(x35))
	x76 := (x33 + x75)
	x77 := (x32 + x76)
	x78 := (x31 + x77)
	x79 := (x30 + x78)
	x80 := (x29 + x79)
	x81 := (x27 + u64(x28))
	x82 := (x26 + x81)
	x83 := (x25 + x82)
	x84 := (x24 + x83)
	x85 := (x23 + x84)
	x86 := (x22 + x85)
	x87 := (x20 + u64(x21))
	x88 := (x19 + x87)
	x89 := (x18 + x88)
	x90 := (x17 + x89)
	x91 := (x16 + x90)
	x92 := (x15 + x91)
	x93 := (x13 + u64(x14))
	x94 := (x12 + x93)
	x95 := (x11 + x94)
	x96 := (x10 + x95)
	x97 := (x9 + x96)
	x98 := (x8 + x97)
	x99 := (x6 + u64(x7))
	x100 := (x5 + x99)
	x101 := (x4 + x100)
	x102 := (x3 + x101)
	x103 := (x2 + x102)
	x104 := (x1 + x103)
	out1[0] = x62
	out1[1] = x68
	out1[2] = x74
	out1[3] = x80
	out1[4] = x86
	out1[5] = x92
	out1[6] = x98
	out1[7] = x104
}

fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
	x1 := arg1[0]
	x2 := arg1[1]
	x3 := arg1[2]
	x4 := arg1[3]
	x5 := arg1[4]
	x6 := arg1[5]
	x7 := arg1[6]
	x8 := arg1[7]
	out1[0] = x1
	out1[1] = x2
	out1[2] = x3
	out1[3] = x4
	out1[4] = x5
	out1[5] = x6
	out1[6] = x7
	out1[7] = x8
}
